import logging
import os
import re
import urllib.parse
from urllib import parse

from checker.utils_config import init_global_config_setting, init_output_path

PARAMETER_DICT = init_global_config_setting()  # 记录个性化参数配置
BASE_DIR = os.path.dirname(os.path.abspath(__file__))


def transfer_file_path_to_raw_url(file_name, organization, repo_name, org_url=None):
    if not org_url:
        org_url = 'https://gitee.com/' + organization + '/'
    file_name = file_name.replace('\\', '/')
    prefix = re.match(f'^.*?{repo_name}', file_name).group()
    raw_link = file_name.replace(prefix, org_url + repo_name + '/raw/master')
    return raw_link


def form_name_for_org_repos_file(organization):
    output_file_name = organization + '_组织仓库清单.xlsx'
    output_to_where = init_output_path()
    org_repos_stat_file = os.path.join(output_to_where, output_file_name)
    return org_repos_stat_file


def get_org_name_from_repo_url(repo_url):
    organization = ''
    prefix = 'https://gitee.com/'
    if prefix in repo_url:
        organization = re.search(rf'(?<={prefix})[^/]+(?=/|$)', repo_url).group()
    return organization


def get_repo_name_from_repo_url(repo_url):
    return re.search(r'(?<=/)[^/]+?(?=$|\.git)', repo_url).group()


def get_repo_name_from_file_url(file_url, organization):
    repo_name = ''
    prefix = f'https://gitee.com/{organization}/'
    if organization and prefix in file_url:
        repo_name = re.search(rf'(?<={prefix})[^/]+?(?=$|/)', file_url).group()
    return repo_name


def form_absolute_local_path(relative_path, filepath_absolute, find_root_path=None):
    if find_root_path is None:
        find_root_path = get_oh_repo_local_root_path
    filename = os.path.basename(filepath_absolute)
    pathname = os.path.abspath(filepath_absolute)
    current_path = pathname.replace(filename, '')
    if relative_path.startswith('/'):
        current_path = find_root_path(current_path)
    relative_path = re.search(r'(?=[^\/\\#]).+?(?=$|#)', relative_path).group()
    absolute_path = os.path.join(current_path, relative_path)
    return absolute_path


def get_oh_repo_local_root_path(path):
    output_path = os.path.abspath(init_output_path())
    regex = output_path + '\\.+?\\.+?\\'
    match = re.match(regex.replace('\\', '\\\\'), path)
    return match.group()


def form_absolute_url_path(relative_url, absolute_url, find_root_path=None):
    if find_root_path is None:
        find_root_path = get_oh_repo_url_root_path
    if relative_url.startswith('/'):
        relative_url = relative_url[1:]
        absolute_url = find_root_path(absolute_url)
    abs_path = parse.urljoin(absolute_url, relative_url)
    return abs_path


def get_oh_repo_url_root_path(url):
    match = re.match(r'https://gitee.com/[^/]+/[^/]+/[^/]+/[^/]+/', url)
    return match.group()


def parse_url_with_encode_lang(url_with_encode_language):
    url_with_language = urllib.parse.unquote(url_with_encode_language)
    return url_with_language


def parse_lang_to_url_but_encoded(url_with_language):
    url_with_encode_language = urllib.parse.quote(url_with_language)
    return url_with_encode_language


def loop_to_check_equals_with_list_word(sentence, word_list, msg=''):
    for word in word_list:
        if word == sentence:
            logging.warning(msg)
            return True
    return False


def loop_to_check_contains_list_word(sentence, word_list, msg=''):
    for word in word_list:
        if word in sentence:
            logging.warning(msg)
            return True
    return False


def loop_to_check_ends_with_list_word(sentence, word_list, msg=''):
    for word in word_list:
        if sentence.endswith(word):
            logging.warning(msg)
            return True
    return False


def loop_to_check_starts_with_list_word(sentence, word_list, msg=''):
    for word in word_list:
        if sentence.startswith(word):
            logging.warning(msg)
            return True
    return False


def uniform_organization_name(organization):
    org = organization.strip().lower().replace('https://gitee.com', '')
    trans_dict = {'\\': '/',
                  ' ': None,
                  '/': None}
    trans_table = org.maketrans(trans_dict)
    org = org.translate(trans_table)
    return org


def find_target_set_given_content_and_pattern(content, pattern):
    target_set = set()
    target_list = pattern.findall(content)
    target_set.update(target_list)
    return target_set


def is_target_no_need_to_check(target):
    if url_is_set_to_ignore(target):
        return True
    if url_contains_keyword_to_ignore(target):
        return True
    if url_end_with_postfix_to_ignore(target):
        return True
    if url_start_with_prefix_to_ignore(target):
        return True
    return False


def url_is_set_to_ignore(target):
    msg = f'链接{target}为配置文件中，设定跳过的链接，这里判定不是目标链了'
    return loop_to_check_equals_with_list_word(target, PARAMETER_DICT['url_to_skip'], msg)


def url_contains_keyword_to_ignore(target):
    msg = f'链接{target}含有配置文件中，设定跳过的关键字，这里判定不是目标链了'
    return loop_to_check_contains_list_word(target, PARAMETER_DICT['keyword_to_skip'], msg)


def url_end_with_postfix_to_ignore(target):
    msg = f'链接{target}含有配置文件中，设定跳过的尾缀，这里判定不是目标链了'
    return loop_to_check_ends_with_list_word(target, PARAMETER_DICT['postfix_to_skip'], msg)


def url_start_with_prefix_to_ignore(target):
    msg = f'链接{target}含有配置文件中，设定跳过的前缀，这里判定不是目标链了'
    return loop_to_check_starts_with_list_word(target, PARAMETER_DICT['prefix_to_skip'], msg)


def get_plain_titles_in_content(content):
    plain_title_list = []
    # 去除在gitee目前机制中，不可用的锚点语法 <a id=></a> <span id=></a> <a href=></a>
    content = re.sub(r'<([a-z0-9]+).*?></\1>', '', content)
    # 极端方式，处理<sup>标签匹配时，对内部数字与其他字符的双标，保留双份
    content = re.sub(r'<sup>(.*)?</sup>', r'\1', content) + re.sub(r'<sup>.*?</sup>', '', content)
    title_list = re.findall(r'^#{1,6} .*', content, re.M)
    mark_to_remove = ['#', '.', ',', ':', '`', '/', '@', '\\', '+', '(', ')', '[', ']', '\'', '\"', '!', '|', '?',
                      '（', '）', '，', '。', '“', '”', '’', '‘', '：', '！', '·', '~', '？']
    for title in title_list:
        # 将标题处理为，锚点匹配时的素模式（全作小写，忽略标点，'-'被识别为空格，忽略前后空格）
        for mark in mark_to_remove:
            title = title.replace(mark, '')
        plain_title = title.lower().replace('-', ' ').strip()
        plain_title_list.append(plain_title)
    return plain_title_list


def is_anchor_valid_in_content(anchor_point, content):
    """
    极为局限于gitee当前对锚点的支持机制，即，三种语法及其基本兼容变形：
    1.
    # 使用说明 <a name="usage"></a>
    [使用说明](#usage)
    2.
    <h1 id="usage">使用说明</h1>
    [使用说明](#usage)
    3.
    # 使用说明
    [使用说明](#使用说明)
    """
    origin_text = anchor_point[1:]
    # 处理中文转码依然可以匹配的情况
    decode_text = parse_url_with_encode_lang(origin_text)
    text_set = set()
    text_set.add(origin_text)
    text_set.add(decode_text)
    # 处理可以匹配的正常html语法情况
    valid_mark_list = []
    for text in text_set:
        valid_mark_list.append(rf'<a name[ ]*=[ ]*[\'\"]?{text}[\'\"]?>.*?</a>')
        valid_mark_list.append(rf'<h([1-8]) id[ ]*=[ ]*[\'\"]?{text}[\'\"]?>.*?</h\1>')
    for valid_mark in valid_mark_list:
        if re.search(valid_mark, content):
            return True
        if re.search(valid_mark, content):
            return True
    # 处理非正常语法可以匹配的情况
    # 特殊策略：采用获取无格式标题，来匹配锚点的方式，进行锚点判断
    plain_title_list = get_plain_titles_in_content(content)
    for text in text_set:
        text = text.replace('-', ' ')  # 处理锚点中'-'可以匹配空格的情况
        for plain_title in plain_title_list:
            if plain_title in text:
                return True
    return False


def get_raw_file_url(file_url):
    # 如果网址意图在md，需要使用raw，不然会误报网页中其他的url
    if 'blob' in file_url and '.md' in file_url:
        file_url = file_url.replace('blob', 'raw')
    return file_url


if __name__ == '__main__':
    pass
